//*************************************************************************************************
//
//	Description:
//		MotionBlur2.fx
//
//	<P> Copyright (c) 2008 Blimey! Games Ltd. All rights reserved.
//
//	Author: 
//		Alastair Murray
//
//	History:
//
//	<TABLE>
//		\Author         Date        Version       Description
//		--------        -----       --------      ------------
//		AMurray		    01/03/2008  0.1           Created
//	<TABLE>
//
//*************************************************************************************************

#include "stddefs.fxh"

// default to medium quality
#if !defined(HIGH_QUALITY) && !defined(MEDIUM_QUALITY) && !defined(LOW_QUALITY)
#define	MEDIUM_QUALITY
#endif

//#define NO_DITHER
//#define USE_2x2_DITHER_ONLY
#define USE_4x4_DITHER_ONLY

#if defined(HIGH_QUALITY)
	#define	SAMPLES		19
		const float offsets[SAMPLES-1] = { -5.0f, -3.5f, -2.5f, -2.0f, -1.5f, -1.0f, -0.75f, -0.5f, -0.25f, 0.25f, 0.5f, 0.75f, 1.0f, 1.5f, 2.0f, 2.5f, 3.5f, 5.0f };
#elif defined(MEDIUM_QUALITY)	
	#ifdef NO_DITHER
		#define	SAMPLES		8
	#else
		#define	SAMPLES		6
	#endif
	const float offsets[SAMPLES-1] = { -2.0f, -1.0f, 1.0f, 2.0f, 3.0f };
#elif defined(LOW_QUALITY)
	#define	SAMPLES		4
		const float offsets[SAMPLES-1] = { -1.0f, 1.0f, 2.0f };
#endif

#ifdef _XBOX
	#define _DEPTH_FROM_ZBUFFER_
#endif

#define	MULVEL		1.0f/SAMPLES

#define	VEL_MULTIPLIER	float2(-0.25f,0.25f)	// can shorten blur by making this <1.0f

#define	MAX_VEL			0.03f	// max velocity



texture sceneTex : TEXTURE;
sampler sceneInputTex : SAMPLER = sampler_state
{
	Texture = < sceneTex >;
	AddressU  = Clamp;
	AddressV  = Clamp;
	MinFilter = Linear;
	MagFilter = Linear;
	SET_NO_ANISOTROPY
#ifdef _PS3_
	FX_SAMPLERSTATE_SRGB_TEXTURE
#else
	FX_SAMPLERSTATE_LINEAR_TEXTURE
#endif
	MipFilter = None;
};

float4 sceneResAndInvHalfRes;

texture depthTex : TEXTURE;
sampler depthInputTex : SAMPLER = sampler_state
{
	FX_SAMPLERSTATE_LINEAR_TEXTURE
	Texture = < depthTex >;
	AddressU  = Clamp;
	AddressV  = Clamp;
#ifdef _PS3_
	MinFilter = Point;
	MagFilter = Point;
#else
	MinFilter = Linear;
	MagFilter = Linear;
#endif	
	MipFilter = None;
	SET_NO_ANISOTROPY
};

float2 viewportOrigin;
float2 viewportScale;


struct VSINPUT
{
#ifdef _PS3_
	float3 position : POSITION;
#else	
	float3 position : POSITION;
	float2 texCoord : TEXCOORD0;
#endif	
};

struct VSOUTPUT
{
	float4 position : POSITION;
	float4 texCoord : TEXCOORD0;	// xy=texcoord, zw=pixel position

#ifndef NO_DITHER
	float4 halfAndQuarterScreenCoords: TEXCOORD1;
#endif	

	float4 prevPosWithoutTheDepth: TEXCOORD2;
};

struct VSOUTPUTMASK
{
	float4 position : POSITION;
	float4 texCoord : TEXCOORD0;	// xy=texcoord, zw=pixel position
};


float4x4	prevViewProj;
float4    prevViewProjThirdRow;

VSOUTPUT MotionBlurVS_WithViewport( VSINPUT _input )
{
	VSOUTPUT output;

	// Apply the viewport transformation to the input tex coord
	output.position = float4( _input.position.xyz, 1.0f );
#ifdef _PS3_
	// Generates its own coords
	float2 texCoord;
	texCoord.x=_input.position.x*0.5f+0.5f;
	texCoord.y=_input.position.y*-0.5f+0.5f;
	output.texCoord.xy = ( texCoord * viewportScale ) + viewportOrigin;
	output.texCoord.z = (texCoord.x*2.0f)-1.0f;
	output.texCoord.w = ((1.0f-texCoord.y)*2.0f)-1.0f;
#else
	output.texCoord.xy = ( _input.texCoord * viewportScale ) + viewportOrigin;
	output.texCoord.z = (_input.texCoord.x*2.0f)-1.0f;
	output.texCoord.w = ((1.0f-_input.texCoord.y)*2.0f)-1.0f;
#endif	

#ifndef NO_DITHER
	float2 screenCoords = sceneResAndInvHalfRes.xy * ( 0.5f * output.texCoord.zw + 0.5f );
	
#if !defined(_XBOX) && !defined(_PS3_)
	// this is half pixel problem workaround, so really needed on pc only
	screenCoords -= sceneResAndInvHalfRes.zw;
#endif
	output.halfAndQuarterScreenCoords = screenCoords.xyxy * float4( 0.5, 0.5, 0.25, 0.25 );
#endif

	float4	currentPosWithoutTheDepth = float4( output.texCoord.z, output.texCoord.w, 0.0f, 1.0f );

	// find where this pixel was last frame, this is viewprojI combined with prevViewProj
	output.prevPosWithoutTheDepth = mul( currentPosWithoutTheDepth, prevViewProj );
	
	return output;
}

VSOUTPUTMASK MaskFromStencilVS_WithViewport( VSINPUT _input )
{
	VSOUTPUTMASK output;

	// Apply the viewport transformation to the input tex coord
	output.position = float4( _input.position.xyz, 1.0f );
#ifdef _PS3_
	// Generates its own coords
	float2 texCoord;
	texCoord.x=_input.position.x*0.5f+0.5f;
	texCoord.y=_input.position.y*-0.5f+0.5f;
	output.texCoord.xy = ( texCoord * viewportScale ) + viewportOrigin;
	output.texCoord.z = (texCoord.x*2.0f)-1.0f;
	output.texCoord.w = ((1.0f-texCoord.y)*2.0f)-1.0f;
#else
	output.texCoord.xy = ( _input.texCoord * viewportScale ) + viewportOrigin;
	output.texCoord.z = (_input.texCoord.x*2.0f)-1.0f;
	output.texCoord.w = ((1.0f-_input.texCoord.y)*2.0f)-1.0f;
#endif	

	return output;
}

float	CalcDepth( float4 depthTex )
{
#ifdef _PS3_
	// calc depth using z buffer
	const float3 depthFactor = float3(65536.0f / 16777215.0f, 256.0f / 16777215.0f, 1.0f / 16777215.0f);
	float depth = dot(round(float3(depthTex.a, depthTex.r, depthTex.g) * 255.0f), depthFactor);	
	
	// Need to rescale due to GL matrix
	depth*=2.0f;
	depth-=1.0f;
#else
	// calc depth
	#ifdef _XBOX
		float	depth = depthTex.x;
	#else
		#ifdef _DEPTH_FROM_ZBUFFER_
			float	depth = dot( depthTex, float3( 0.99609375, 3.89099121e-3, 1.51991844e-5 ) );
		#else
			float	depth = depthTex.x;
		#endif
	#endif
#endif	

	return depth;
}

COLOUR_OUTPUT_TYPE	MotionBlurPS( float4 texCoord : TEXCOORD0
#ifndef NO_DITHER
	, float4 halfAndQuarterScreenCoords: TEXCOORD1
#endif
	, float4 prevPosWithoutTheDepth: TEXCOORD2
 ) : COLOR0
{
	float4	output;
	
	// get textures
	const float2	uv = texCoord.xy;
	float4	depthTex = tex2D( depthInputTex, uv );

	// get this pixel's world position (in view-proj space)
	float 	d = CalcDepth( depthTex );
#ifndef _DEPTH_FROM_ZBUFFER_
	if( d==0.0f )
	{
		d = 1.0f;
	}
#endif
	float4	currentPos = float4( texCoord.z, texCoord.w, d, 1.0f );

	// find where this pixel was last frame, this is viewprojI combined with prevViewProj
	float4	prevPos = prevPosWithoutTheDepth;
	prevPos += prevViewProjThirdRow * d;

	prevPos	/= prevPos.w;

	// calc velocity from pixel to pixel last frame
	float2	velocity = (currentPos - prevPos) * VEL_MULTIPLIER;
	
/* -- when I disabled this I can't rly see any significant difference
#if !defined(LOW_QUALITY) 
	// slow down velocity at edges to avoid problems when trying to project off the screen
	float	edge = min(min(prevPos.y,prevPos.x),-prevPos.x);	// miss out -prevPos.y cos we don't worry too much about the top of the screen having this problem
	float	mul = saturate(2.0f * edge + 3.0f);
	mul = (mul*0.9f)+0.1f;
	velocity *= mul;
#endif*/

	// limit velocity
	float vel_len = length(velocity);
	velocity *= (min(MAX_VEL, vel_len) / (vel_len+0.000001f));

	// do blur
	float2	velocity_step = velocity * MULVEL;

#if !defined(NO_DITHER)
	
	// now lets compute dithering mask
	float2 fracCoords = frac( halfAndQuarterScreenCoords.xy ); // we use half screen coords, so the frac is either 0 or 0.5
	//float2 dither = fracCoords * float2(2, 4);
	//
	// 00 10 00 10
	// 02 12 02 12
	// 00 10 00 10
	// 02 12 02 12
	//
	
	// we need 1D values
	float ditherOffset = dot( fracCoords, float2(2,4) ); //dither.x + dither.y;
	// 0  1  0  1
	// 2  3  2  3
	// 0  1  0  1
	// 2  3  2  3

#	ifdef USE_2x2_DITHER_ONLY
		// that are around zero -.5,-.25,0.0,.25 ... the -0.5 is the same as the neighbour pixels +0.5, so we do not need 0.5 in the kernel
		ditherOffset = ditherOffset * 0.25 - 0.5;
#	else

		fracCoords = frac( halfAndQuarterScreenCoords.zw ); // we use quarter screen coords, so the frac is either 0, or 0.25, or 0.5 or 0.75
		float2 dither = (float2)( fracCoords > 0.4f );
		//
		// 00 00 10 10
		// 00 00 10 10
		// 10 10 11 11
		// 10 10 11 11
		//
		ditherOffset = 4 * ditherOffset + ( dither.x + 2 * dither.y );
		
		// 0  4   1  5
		// 8  12  9  13
		// 2  6   3  7
		// 10 14  11 15

#		ifdef USE_4x4_DITHER_ONLY
		// that are around zero -.5, ... ,0.5-0.0625 the -0.5 is the same as the neighbour pixels +0.5, so we do not need 0.5 in the kernel
		ditherOffset = ditherOffset * 0.0625 - 0.5;
#		else

		dither = (float2)( frac( halfAndQuarterScreenCoords.zw * 0.5f ) > 0.4f );
		//
		// 00 00 00 00 10 10 10 10
		// 00 00 00 00 10 10 10 10
		// 00 00 00 00 10 10 10 10
		// 00 00 00 00 10 10 10 10
		// 01 01 01 01 11 11 11 11
		// 01 01 01 01 11 11 11 11
		// 01 01 01 01 11 11 11 11
		// 01 01 01 01 11 11 11 11
		//
		ditherOffset = 4 * ditherOffset + ( dither.x + 2 * dither.y );

		ditherOffset = ditherOffset * 0.015625 - 0.5;
		
#		endif

#	endif

	//return float4( ( ditherOffset + 0.5f ).xxx, 1);
	
	// now just dither the uv samples using this offset
	float2	uvsam = uv - ( ditherOffset + 0.5 * SAMPLES ) * velocity_step;

#else
	// not implemented on ps3 & x360 atm because I have not been able to test there it yet 
// now just dither the uv samples using this offset
	float2	uvsam = uv - ( 0.5 * SAMPLES ) * velocity_step;
#endif	
	
	float4	col = tex2D( sceneInputTex, uvsam );
	for( int i=1; i<SAMPLES; i++ )
	{
		uvsam += velocity_step;
		col += tex2D( sceneInputTex, uvsam );
	}
	col /= SAMPLES;


	// output
	output = col;

//output.rgb = tex2D( depthInputTex, uv );
//output.rgb = -pixelPos.y;
//output.rgb = 0; output.rgb = saturate(velocity.y)*2.0f;
//output.rgb = saturate(length(velocity/1.0));
//output.rgb *= mul;
//if( vel_len_sq>MAX_VEL*MAX_VEL ) {output.rgb = 1; }

	return output;
}

COLOUR_OUTPUT_TYPE	MotionBlurUsingMaskPS( float4 texCoord : TEXCOORD0
#ifndef NO_DITHER
	, float4 halfAndQuarterScreenCoords: TEXCOORD1
#endif
	, float4 prevPosWithoutTheDepth: TEXCOORD2
 ) : COLOR0
{
	float4	output;
	
	// get textures
	const float2	uv = texCoord.xy;
	float4	depthTex = tex2D( depthInputTex, uv );

	// get this pixel's world position (in view-proj space)
	float 	d = CalcDepth( depthTex );
#ifndef _DEPTH_FROM_ZBUFFER_
	if( d==0.0f )
	{
		d = 1.0f;
	}
#endif
	float4	currentPos = float4( texCoord.z, texCoord.w, d, 1.0f );

	// find where this pixel was last frame, this is viewprojI combined with prevViewProj
	float4	prevPos = prevPosWithoutTheDepth;
	prevPos += prevViewProjThirdRow * d;
	prevPos	/= prevPos.w;

	// calc velocity from pixel to pixel last frame
	float2	velocity = (currentPos - prevPos) * VEL_MULTIPLIER;
	
/* -- when I disabled this I can't rly see any significant difference
#if !defined(LOW_QUALITY) 
	// slow down velocity at edges to avoid problems when trying to project off the screen
	float	edge = min(min(prevPos.y,prevPos.x),-prevPos.x);	// miss out -prevPos.y cos we don't worry too much about the top of the screen having this problem
	float	mul = saturate(2.0f * edge + 3.0f);
	mul = (mul*0.9f)+0.1f;
	velocity *= mul;
#endif*/

	// limit velocity
	float vel_len = length(velocity);
	velocity *= (min(MAX_VEL, vel_len) / (vel_len+0.000001f));

	// do blur
	float2	velocity_step = velocity * MULVEL;

#if !defined(NO_DITHER)
	
	// now lets compute dithering mask
	float2 fracCoords = frac( halfAndQuarterScreenCoords.xy ); // we use half screen coords, so the frac is either 0 or 0.5
	float2 dither = fracCoords * float2(2, 4);
	//
	// 00 10 00 10
	// 02 12 02 12
	// 00 10 00 10
	// 02 12 02 12
	//
	
	// we need 1D values
	float ditherOffset = dither.x + dither.y;
	// 0  1  0  1
	// 2  3  2  3
	// 0  1  0  1
	// 2  3  2  3

#	ifdef USE_2x2_DITHER_ONLY
		// that are around zero -.5,-.25,0.0,.25 ... the -0.5 is the same as the neighbour pixels +0.5, so we do not need 0.5 in the kernel
		ditherOffset = ditherOffset * 0.25 - 0.5;
#	else

		fracCoords = frac( halfAndQuarterScreenCoords.zw ); // we use quarter screen coords, so the frac is either 0, or 0.25, or 0.5 or 0.75
		dither = (float2)( fracCoords > 0.4f );
		//
		// 00 00 10 10
		// 00 00 10 10
		// 10 10 11 11
		// 10 10 11 11
		//
		ditherOffset = 4 * ditherOffset + ( dither.x + 2 * dither.y );
		
		// 0  4   1  5
		// 8  12  9  13
		// 2  6   3  7
		// 10 14  11 15

#		ifdef USE_4x4_DITHER_ONLY
		// that are around zero -.5, ... ,0.5-0.0625 the -0.5 is the same as the neighbour pixels +0.5, so we do not need 0.5 in the kernel
		ditherOffset = ditherOffset * 0.0625 - 0.5;
#		else

		dither = (float2)( frac( halfAndQuarterScreenCoords.zw * 0.5f ) > 0.4f );
		//
		// 00 00 00 00 10 10 10 10
		// 00 00 00 00 10 10 10 10
		// 00 00 00 00 10 10 10 10
		// 00 00 00 00 10 10 10 10
		// 01 01 01 01 11 11 11 11
		// 01 01 01 01 11 11 11 11
		// 01 01 01 01 11 11 11 11
		// 01 01 01 01 11 11 11 11
		//
		ditherOffset = 4 * ditherOffset + ( dither.x + 2 * dither.y );

		ditherOffset = ditherOffset * 0.015625 - 0.5;
		
#		endif

#	endif

	//return float4( ( ditherOffset + 0.5f ).xxx, 1);
	
	// now just dither the uv samples using this offset
	//float2	uvsam = uv - ( ditherOffset + 0.5 * SAMPLES ) * velocity_step;
	float2	uvsam = uv;

#else
	// not implemented on ps3 & x360 atm because I have not been able to test there it yet 
// now just dither the uv samples using this offset
	//float2	uvsam = uv - ( 0.5 * SAMPLES ) * velocity_step;
	float2	uvsam = uv;
#endif	

	float4	col = tex2D( sceneInputTex, uvsam );
	if( col.a>0.0f )
	{
		uvsam += - ditherOffset * velocity_step;
		float		sum = 1.0f;
		#ifdef _XBOX
		[unroll(SAMPLES-1)]
		#endif
		for( int i=0; i<SAMPLES-1; i++ )
		{
			float2 offsetuv = uvsam + offsets[i]*velocity_step;
			float4	col2 = tex2D( sceneInputTex, offsetuv );
			col2.a = step( 1.0f, col2.a );	// make alpha 0 if it's anything BUT one (as bilinear filtering may mean we've half a bad pixel)
			col += col2*col2.a;
			sum += col2.a;
		}
		col /= sum;
	}

	// output
	output = col;

//output.rgb = tex2D( depthInputTex, uv );
//output.rgb = -pixelPos.y;
//output.rgb = 0; output.rgb = saturate(velocity.y)*2.0f;
//output.rgb = saturate(length(velocity/1.0));
//output.rgb *= mul;
//if( vel_len_sq>MAX_VEL*MAX_VEL ) {output.rgb = 1; }

	return output;
}



struct VSOUTPUT_MASK
{
	float4 position : POSITION;
};

VSOUTPUT_MASK MaskVS( VSINPUT _input )
{
	VSOUTPUT_MASK output;

	output.position = float4( _input.position.xyz, 1.0f );

	return output;
}

COLOUR_OUTPUT_TYPE	MaskClearPS() : COLOR0
{
	float4	output;
	output = 1.0f;
	return output;
}

COLOUR_OUTPUT_TYPE	MaskPS() : COLOR0
{
	float4	output;
	output = 0.0f;
	return output;
}

COLOUR_OUTPUT_TYPE	MaskFromStencilPS( float4 uv : TEXCOORD0 ) : COLOR0
{
	float4	output;
	float4	depthTex = tex2D( depthInputTex, uv.xy );
	output = 1.0f-(depthTex.b*255.0f);	// stencil value
	return output;
}





technique MotionBlur
{
	pass Pass0
	{
		ZEnable = 0;
		ZWriteEnable = false;
		AlphaBlendEnable = false;
		AlphaTestEnable = false;
#ifdef _PS3_
		CullFaceEnable=false;
		VertexShader = compile sce_vp_rsx MotionBlurVS_WithViewport();
		PixelShader = compile sce_fp_rsx MotionBlurPS();
#else		
		VertexShader = compile vs_3_0 MotionBlurVS_WithViewport();
		PixelShader = compile ps_3_0 MotionBlurPS();
#endif
	}
}

technique MotionBlurUsingMask
{
	pass Pass0
	{
		ZEnable = 0;
		ZWriteEnable = false;
		AlphaBlendEnable = false;
		AlphaTestEnable = false;
#ifdef _PS3_
		CullFaceEnable=false;
		VertexShader = compile sce_vp_rsx MotionBlurVS_WithViewport();
		PixelShader = compile sce_fp_rsx MotionBlurUsingMaskPS();
#else		
		VertexShader = compile vs_3_0 MotionBlurVS_WithViewport();
		PixelShader = compile ps_3_0 MotionBlurUsingMaskPS();
#endif
	}
}


technique ClearMaskBeforeStencil
{
	pass Pass0
	{
		ZEnable = 0;
		ZWriteEnable = false;
		AlphaBlendEnable = false;
		AlphaTestEnable = false;
#ifdef _PS3_
		CullFaceEnable=false;
		VertexShader = compile sce_vp_rsx MaskVS();
		PixelShader = compile sce_fp_rsx MaskClearPS();
#else		
		VertexShader = compile vs_3_0 MaskVS();
		PixelShader = compile ps_3_0 MaskClearPS();
#endif
	}
}


technique CreateMaskFromStencil
{
	pass Pass0
	{
		ZEnable = 0;
		ZWriteEnable = false;
		AlphaBlendEnable = false;
		AlphaTestEnable = false;
#ifdef _PS3_
		CullFaceEnable=false;
		VertexShader = compile sce_vp_rsx MaskVS();
		PixelShader = compile sce_fp_rsx MaskPS();
#else		
		VertexShader = compile vs_3_0 MaskVS();
		PixelShader = compile ps_3_0 MaskPS();
#endif
	}
}

technique WriteMaskFromDepthStencil
{
	pass Pass0
	{
		ZEnable = 0;
		ZWriteEnable = false;
		AlphaBlendEnable = false;
		AlphaTestEnable = false;
#ifdef _PS3_
		CullFaceEnable=false;
		VertexShader = compile sce_vp_rsx MaskFromStencilVS_WithViewport();
		PixelShader = compile sce_fp_rsx MaskFromStencilPS();
#else		
		VertexShader = compile vs_3_0 MaskFromStencilVS_WithViewport();
		PixelShader = compile ps_3_0 MaskFromStencilPS();
#endif
	}
}
